import pandas as pd
import random
from sklearn.model_selection import train_test_split

# 读取数据
df = pd.read_csv("data/small_data_10MB.csv")

# 将数据分为训练集和测试集，比例为60%训练集，40%测试集
train_data, test_data = train_test_split(df, test_size=0.4, random_state=42)

# 将训练集和测试集分别保存到 train.txt 和 test.txt
def save_to_file(data, filename):
    with open(filename, 'w', encoding='utf-8') as f:
        for index, row in data.iterrows():
            f.write(f"{row['label']}\t{row['text']}\n")

save_to_file(train_data, "train.txt")
save_to_file(test_data, "test.txt")

print("数据集转换完成！")
